Completed
Push — master ( 7f1825...1a8865 )
by Elbert
01:01
created

wappalyzer.js ➔ resolveImplies   B

Complexity

Conditions 2
Paths 17

Size

Total Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 2
c 2
b 0
f 0
nc 17
nop 2
dl 0
loc 36
rs 8.8571

1 Function

Rating   Name   Duplication   Size   Complexity  
B wappalyzer.js ➔ ... ➔ ??? 0 26 3
1
/**
2
 * Wappalyzer v5
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
'use strict';
10
11
const validation = {
12
  hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
13
  hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
14
};
15
16
var wappalyzer = {
17
  apps: {},
18
  categories: {},
19
  driver: {}
20
};
21
22
var detected = {};
23
var hostnameCache = {};
24
var adCache = [];
25
26
wappalyzer.config = {
27
  websiteURL: 'https://wappalyzer.com/',
28
  twitterURL: 'https://twitter.com/Wappalyzer',
29
  githubURL: 'https://github.com/AliasIO/Wappalyzer',
30
};
31
32
/**
33
 * Log messages to console
34
 */
35
wappalyzer.log = (message, source, type) => {
36
  wappalyzer.driver.log(message, source || '', type || 'debug');
37
};
38
39
wappalyzer.analyze = (hostname, url, data, context) => {
40
  var apps = {};
41
42
  // Remove hash from URL
43
  data.url = url = url.split('#')[0];
44
45
  if ( typeof data.html !== 'string' ) {
46
    data.html = '';
47
  }
48
49
  if ( detected[url] === undefined ) {
50
    detected[url] = {};
51
  }
52
53
  Object.keys(wappalyzer.apps).forEach(appName => {
54
    apps[appName] = detected[url] && detected[url][appName] ? detected[url][appName] : new Application(appName, wappalyzer.apps[appName]);
55
56
    var app = apps[appName];
57
58
    if ( url ) {
59
      analyzeUrl(app, url);
60
    }
61
62
    if ( data.html ) {
63
      analyzeHtml(app, data.html);
64
      analyzeScript(app, data.html);
65
      analyzeMeta(app, data.html);
66
    }
67
68
    if ( data.headers ) {
69
      analyzeHeaders(app, data.headers);
70
    }
71
72
    if ( data.env ) {
73
      analyzeEnv(app, data.env);
74
    }
75
76
    if ( data.robotsTxt ) {
77
      analyzeRobotsTxt(app, data.robotsTxt);
78
    }
79
  })
80
81
  Object.keys(apps).forEach(appName => {
82
    var app = apps[appName];
83
84
    if ( !app.detected || !app.getConfidence() ) {
85
      delete apps[app.name];
86
    }
87
  });
88
89
  resolveExcludes(apps);
90
  resolveImplies(apps, url);
91
92
  cacheDetectedApps(apps, url);
93
  trackDetectedApps(apps, url, hostname, data.html);
94
95
  if ( Object.keys(apps).length ) {
96
    wappalyzer.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url, 'core');
97
  }
98
99
  wappalyzer.driver.displayApps(detected[url], context);
100
}
101
102
/**
103
 * Cache detected ads
104
 */
105
wappalyzer.cacheDetectedAds = ad => {
106
  adCache.push(ad);
107
}
108
109
/**
110
 *
111
 */
112
wappalyzer.robotsTxtAllows = url => {
113
  return new Promise((resolve, reject) => {
114
    var parsed = wappalyzer.parseUrl(url);
115
116
    wappalyzer.driver.getRobotsTxt(parsed.host, parsed.protocol === 'https:')
117
      .then(robotsTxt => {
118
        robotsTxt.forEach(disallow => {
119
          if ( parsed.pathname.indexOf(disallow) === 0 ) {
120
            reject();
121
          }
122
        });
123
124
        resolve();
125
      });
126
  });
127
};
128
129
/**
130
 * Parse a URL
131
 */
132
wappalyzer.parseUrl = url => {
133
  var a = wappalyzer.driver.document.createElement('a');
134
135
  a.href = url;
136
137
  a.canonical = a.protocol + '//' + a.host + a.pathname;
138
139
  return a;
140
}
141
142
/**
143
 *
144
 */
145
wappalyzer.parseRobotsTxt = robotsTxt => {
146
  var userAgent;
147
  var disallow = [];
148
149
  robotsTxt.split('\n').forEach(line => {
150
    var matches = /^User-agent:\s*(.+)$/i.exec(line);
151
152
    if ( matches ) {
153
      userAgent = matches[1].toLowerCase();
154
    } else {
155
      if ( userAgent === '*' || userAgent === 'wappalyzer' ) {
156
        matches = /^Disallow:\s*(.+)$/i.exec(line);
157
158
        if ( matches ) {
159
          disallow.push(matches[1]);
160
        }
161
      }
162
    }
163
  });
164
165
  return disallow;
166
}
167
168
/**
169
 *
170
 */
171
wappalyzer.ping = () => {
172
  if ( Object.keys(hostnameCache).length >= 50 || adCache.length >= 50 ) {
173
    wappalyzer.driver.ping(hostnameCache, adCache);
174
175
    hostnameCache = {};
176
    adCache = [];
177
  }
178
}
179
180
/**
181
 * Enclose string in array
182
 */
183
function asArray(value) {
184
  return typeof value === 'string' ? [ value ] : value;
185
}
186
187
/**
188
 * Parse apps.json patterns
189
 */
190
function parsePatterns(patterns) {
191
  var parsed = {};
192
193
  // Convert string to object containing array containing string
194
  if ( typeof patterns === 'string' || patterns instanceof Array ) {
195
    patterns = {
196
      main: asArray(patterns)
197
    };
198
  }
199
200
  for ( var key in patterns ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
201
    parsed[key] = [];
202
203
    asArray(patterns[key]).forEach(pattern => {
204
      var attrs = {};
205
206
      pattern.split('\\;').forEach((attr, i) => {
207
        if ( i ) {
208
          // Key value pairs
209
          attr = attr.split(':');
210
211
          if ( attr.length > 1 ) {
212
            attrs[attr.shift()] = attr.join(':');
213
          }
214
        } else {
215
          attrs.string = attr;
216
217
          try {
218
            attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
219
          } catch (e) {
220
            attrs.regex = new RegExp();
221
222
            wappalyzer.log(e + ': ' + attr, 'error', 'core');
223
          }
224
        }
225
      });
226
227
      parsed[key].push(attrs);
228
    });
229
  }
230
231
  // Convert back to array if the original pattern list was an array (or string)
232
  if ( 'main' in parsed ) {
233
    parsed = parsed.main;
234
  }
235
236
  return parsed;
237
}
238
239
function resolveExcludes(apps) {
240
  var excludes = [];
241
242
  // Exclude app in detected apps only
243
  Object.keys(apps).forEach(appName => {
244
    var app = apps[appName];
245
246
    if ( app.props.excludes ) {
247
      asArray(app.props.excludes).forEach(excluded => {
248
        excludes.push(excluded);
249
      });
250
    }
251
  })
252
253
  // Remove excluded applications
254
  Object.keys(apps).forEach(appName => {
255
    if ( excludes.indexOf(appName) !== -1 ) {
256
      delete apps[appName];
257
    }
258
  })
259
}
260
261
function resolveImplies(apps, url) {
262
  var checkImplies = true;
263
264
  // Implied applications
265
  // Run several passes as implied apps may imply other apps
266
  while ( checkImplies ) {
267
    checkImplies = false;
268
269
    Object.keys(apps).forEach(appName => {
270
      var app = apps[appName];
271
272
      if ( app && app.implies ) {
273
        asArray(app.props.implies).forEach(implied => {
274
          implied = parsePatterns(implied)[0];
275
276
          if ( !wappalyzer.apps[implied.string] ) {
277
            wappalyzer.log('Implied application ' + implied.string + ' does not exist', 'core', 'warn');
278
279
            return;
280
          }
281
282
          if ( !( implied.string in apps ) ) {
283
            apps[implied.string] = detected[url] && detected[url][implied.string] ? detected[url][implied.string] : new Application(implied.string, true);
284
285
            checkImplies = true;
286
          }
287
288
          // Apply app confidence to implied app
289
          Object.keys(app.confidence).forEach(id => {
290
            apps[implied.string].confidence[id + ' implied by ' + appName] = app.confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
291
          });
292
        });
293
      }
294
    });
295
  }
296
}
297
298
/**
299
 * Cache detected applications
300
 */
301
function cacheDetectedApps(apps, url) {
302
  if (!wappalyzer.driver.ping instanceof Function) return;
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
303
304
  Object.keys(apps).forEach(appName => {
305
    var app = apps[appName];
306
307
    // Per URL
308
    detected[url][appName] = app;
309
310
    Object.keys(app.confidence).forEach(id => {
311
      detected[url][appName].confidence[id] = app.confidence[id];
312
    });
313
  })
314
315
  wappalyzer.ping();
316
}
317
318
/**
319
 * Track detected applications
320
 */
321
function trackDetectedApps(apps, url, hostname, html) {
322
  if (!wappalyzer.driver.ping instanceof Function) return;
0 ignored issues
show
Coding Style Best Practice introduced by
Curly braces around statements make for more readable code and help prevent bugs when you add further statements.

Consider adding curly braces around all statements when they are executed conditionally. This is optional if there is only one statement, but leaving them out can lead to unexpected behaviour if another statement is added later.

Consider:

if (a > 0)
    b = 42;

If you or someone else later decides to put another statement in, only the first statement will be executed.

if (a > 0)
    console.log("a > 0");
    b = 42;

In this case the statement b = 42 will always be executed, while the logging statement will be executed conditionally.

if (a > 0) {
    console.log("a > 0");
    b = 42;
}

ensures that the proper code will be executed conditionally no matter how many statements are added or removed.

Loading history...
323
324
  Object.keys(apps).forEach(appName => {
325
    var app = apps[appName];
326
327
    if ( detected[url][appName].getConfidence() >= 100 ) {
328
      if ( validation.hostname.test(hostname) && !validation.hostnameBlacklist.test(url) ) {
329
        wappalyzer.robotsTxtAllows(url)
330
          .then(() => {
331
            if ( !( hostname in hostnameCache ) ) {
332
              hostnameCache[hostname] = {
333
                applications: {},
334
                meta: {}
335
              };
336
            }
337
338
            if ( !( appName in hostnameCache[hostname].applications ) ) {
339
              hostnameCache[hostname].applications[appName] = {
340
                hits: 0
341
              };
342
            }
343
344
            hostnameCache[hostname].applications[appName].hits ++;
345
346
            if ( apps[appName].version ) {
347
              hostnameCache[hostname].applications[appName].version = app.version;
348
            }
349
          })
350
        .catch(() => console.log('Disallowed in robots.txt: ' + url))
0 ignored issues
show
Debugging Code introduced by
console.log looks like debug code. Are you sure you do not want to remove it?
Loading history...
351
      }
352
    }
353
  });
354
355
  // Additional information
356
  if ( hostname in hostnameCache ) {
357
    var match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
358
359
    if ( match && match.length ) {
360
      hostnameCache[hostname].meta['language'] = match[1];
361
    }
362
  }
363
364
  wappalyzer.ping();
365
}
366
367
/**
368
 * Analyze URL
369
 */
370
function analyzeUrl(app, url) {
371
  var patterns = parsePatterns(app.props.url);
372
373
  if ( patterns.length ) {
374
    patterns.forEach(pattern => {
375
      if ( pattern.regex.test(url) ) {
376
        addDetected(app, pattern, 'url', url);
377
      }
378
    });
379
  }
380
}
381
382
/**
383
 * Analyze HTML
384
 */
385
function analyzeHtml(app, html) {
386
  var patterns = parsePatterns(app.props.html);
387
388
  if ( patterns.length ) {
389
    patterns.forEach(pattern => {
390
      if ( pattern.regex.test(html) ) {
391
        addDetected(app, pattern, 'html', html);
392
      }
393
    });
394
  }
395
}
396
397
/**
398
 * Analyze script tag
399
 */
400
function analyzeScript(app, html) {
401
  var regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig');
402
  var patterns = parsePatterns(app.props.script);
403
404
  if ( patterns.length ) {
405
    patterns.forEach(pattern => {
406
      var match;
407
408
      while ( ( match = regex.exec(html) ) ) {
409
        if ( pattern.regex.test(match[2]) ) {
410
          addDetected(app, pattern, 'script', match[2]);
411
        }
412
      }
413
    });
414
  }
415
}
416
417
/**
418
 * Analyze meta tag
419
 */
420
function analyzeMeta(app, html) {
421
  var regex = /<meta[^>]+>/ig;
422
  var patterns = parsePatterns(app.props.meta);
423
  var content;
424
  var match;
425
426
  while ( patterns && ( match = regex.exec(html) ) ) {
427
    for ( var meta in patterns ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
428
      if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
429
        content = match.toString().match(/content=("|')([^"']+)("|')/i);
430
431
        patterns[meta].forEach(pattern => {
432
          if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
433
            addDetected(app, pattern, 'meta', content[2], meta);
434
          }
435
        });
436
      }
437
    }
438
  }
439
}
440
441
/**
442
 * analyze response headers
443
 */
444
function analyzeHeaders(app, headers) {
445
  var patterns = parsePatterns(app.props.headers);
446
447
  if ( headers ) {
448
    Object.keys(patterns).forEach(header => {
449
      patterns[header].forEach(pattern => {
450
        header = header.toLowerCase();
451
452
        if ( header in headers && pattern.regex.test(headers[header]) ) {
453
          addDetected(app, pattern, 'headers', headers[header], header);
454
        }
455
      });
456
    });
457
  }
458
}
459
460
/**
461
 * Analyze environment variables
462
 */
463
function analyzeEnv(app, envs) {
464
  var patterns = parsePatterns(app.props.env);
465
466
  if ( patterns.length ) {
467
    patterns.forEach(pattern => {
468
      Object.keys(envs).forEach(env => {
469
        if ( pattern.regex.test(envs[env]) ) {
470
          addDetected(app, pattern, 'env', envs[env]);
471
        }
472
      })
473
    });
474
  }
475
}
476
477
/**
478
 * Analyze robots.txt
479
 */
480
function analyzeRobotsTxt(app, robotsTxt) {
481
  var patterns = parsePatterns(app.props.robotsTxt);
482
483
  if ( patterns.length ) {
484
    patterns.forEach(pattern => {
485
      if ( pattern.regex.test(robotsTxt) ) {
486
        addDetected(app, pattern, 'robotsTxt', robotsTxt);
487
      }
488
    });
489
  }
490
}
491
492
/**
493
 * Mark application as detected, set confidence and version
494
 */
495
function addDetected(app, pattern, type, value, key) {
496
  app.detected = true;
497
498
  // Set confidence level
499
  app.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence || 100;
500
501
  // Detect version number
502
  if ( pattern.version ) {
503
    var versions = [];
504
    var version  = pattern.version;
505
    var matches  = pattern.regex.exec(value);
506
507
    if ( matches ) {
508
      matches.forEach((match, i) => {
509
        // Parse ternary operator
510
        var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
511
512
        if ( ternary && ternary.length === 3 ) {
513
          version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
514
        }
515
516
        // Replace back references
517
        version = version.replace(new RegExp('\\\\' + i, 'g'), match || '');
518
      });
519
520
      if ( version && versions.indexOf(version) === -1 ) {
521
        versions.push(version);
522
      }
523
524
      if ( versions.length ) {
525
        // Use the longest detected version number
526
        app.version = versions.reduce((a, b) => a.length > b.length ? a : b);
527
      }
528
    }
529
  }
530
}
531
532
/**
533
 * Application class
534
 */
535
class Application {
536
  constructor(name, props, detected) {
537
    this.confidence      = {};
538
    this.confidenceTotal = 0;
539
    this.detected        = Boolean(detected);
540
    this.excludes        = [];
541
    this.name            = name;
542
    this.props           = props;
543
    this.version         = '';
544
  }
545
546
  /**
547
   * Calculate confidence total
548
   */
549
  getConfidence() {
550
    var total = 0;
551
552
    for ( var id in this.confidence ) {
0 ignored issues
show
Complexity introduced by
A for in loop automatically includes the property of any prototype object, consider checking the key using hasOwnProperty.

When iterating over the keys of an object, this includes not only the keys of the object, but also keys contained in the prototype of that object. It is generally a best practice to check for these keys specifically:

var someObject;
for (var key in someObject) {
    if ( ! someObject.hasOwnProperty(key)) {
        continue; // Skip keys from the prototype.
    }

    doSomethingWith(key);
}
Loading history...
553
      total += this.confidence[id];
554
    }
555
556
    return this.confidenceTotal = Math.min(total, 100);
557
  }
558
}
559
560
if ( typeof module === 'object' ) {
561
  module.exports = wappalyzer;
562
}
563